HTML.XIL
is the default indexsheet for HTML content. The HTML.XIL
indexsheet includes definitions for common fields, handling of hit-anchors and
hit-highlighting, indexing of title tags, creation of table of contents
structure, and handling of break words.
<?xml version='1.0'?> <!-- Default indexsheet for HTML --> <xsl:stylesheet case-sensitive="no" xmlns:xsl= "http://www.w3.org/1999/XSL/Transform" xmlns:np= "http://www.rocketsoftware.com/ns/indexsheet/2.0" extension -element-prefixes="np"> <np:definitions> <field name="dc:title" type="text" term-list="yes" proximity="no" relevance="highest" /> <field name="dc:creator" type="text" term-list="yes" proximity="no" relevance="highest" /> <field name="dc:subject" type="text" term-list="yes" proximity="no" relevance="highest" /> <field name="dc:description" type="text" term-list="yes" proximity="no" relevance="highest" /> </np:definitions> <xsl:template match='META[@name="description"]'> <np:index-attribute name="content" field="dc:description"/> </xsl:template> <xsl:template match='META[@name="author"]'> <np:index-attribute name="content" field="dc:creator"/> </xsl:template> <xsl:template match='META[@name="keywords"]'> <np:index-attribute name="content" field="dc:subject"/> </xsl:template> <!-- hit-anchors are not allowed within an HTML "A" element which is a link --> <!-- Because of its importance, this rule is enforced internally for HTML if not specified --> <xsl:template match="A[attribute(HREF)]"> <np:index hit-anchor="postpone"> <xsl:apply-templates/> </np:index> </xsl:template> <!-- Neither hit-anchor nor hit-hilite is allowed within HTML "HEAD" element --> <!-- A hit can occur within HTML "HEAD" element when indexing TITLE or other text in heading --> <xsl:template match='HEAD'> <np:index hit-anchor="no" hit-hilite="no"> <xsl:apply-templates/> </np:index> </xsl:template> <!-- It is better to not index title when it is the same for all documents or the same as first heading --> <!-- However the HTML "TITLE" element can be indexed as long as a rule is used to not allow hit-anchor nor hit-hilite --> <xsl:template match='TITLE'> <np:index index="no"> <xsl:apply-templates/> </np:index> </xsl:template> <!-- Do not index SCRIPT --> <xsl:template match='SCRIPT'> <np:index index="no" hit-hilite="no"> <xsl:apply-templates/> </np:index> </xsl:template> <!-- Do not index STYLE --> <xsl:template match='STYLE'> <np:index index="no" hit-hilite="no"> <xsl:apply-templates/> </np:index> </xsl:template> <!-- Generate sub-document table of contents (TOC) hierarchy from HTML headings H1 to H6 --> <!-- The first heading found is used as document title --> <xsl:template match='H1|H2|H3|H4|H5|H6'> <np:index toc-heading="title-HTML" title-field= "dc:title"> <xsl:apply-templates/> </np:index> </xsl:template> <!-- proximity="paragraph" marks paragraphs for paragraph proximity searching and automatic abstract generation --> <!-- break-word is needed when the P element is used without surrounding whitespace to prevent words from being stuck together --> <xsl:template match="P"> <np:index proximity="paragraph" break-word="yes"> <xsl:apply-templates/> </np:index> </xsl:template> <!-- A hit-total replace tag is placed at end of BODY element. The form generatd by it is required for next/prev hit functionality --> <xsl:template match="BODY"> <np:index hit-total="yes" break-word="yes"> <xsl:apply-templates/> </np:index> </xsl:template> <!-- break-word rule is needed when the following elements are used without surrounding whitespace to prevent words from being stuck together --> <!-- Word breaks rules are now included by default, but can optionally be edited or removed from indexsheet for HTML --> <xsl:template match="ADDRESS|BR|BLOCKQUOTE|BUTTON |CENTER|DD|DT|DIV|FORM|FRAME|HR|IFRAME|IMG|INPUT |ISINDEX|LI|NOFRAMES|NOSCRIPT|NOEMBED|OBJECT |OPTION|PRE|PLAINTEXT|SPACER|TR|TD|TH|TABLE |TEXTAREA|WBR"> <np:index break-word="yes"> <xsl:apply-templates/> </np:index> </xsl:template> </xsl:stylesheet>
Copyright © 2006-2023, Rocket Software, Inc. All rights reserved.